
import numpy as np
import matplotlib.pyplot as plt
import random

#These arrays are used to determine what a nucleotide can mutate into (ie. everything but itself)
nuca = ['t', 'c', 'g']
nucg = ['t', 'c', 'a']
nucc = ['t', 'g', 'a']
nuct = ['a', 'g', 'c']

#This function translates a string of nucleotides into amino acids
def translate( string, offset):
	j=offset - 1
	codon = 'aaa'
	amino = " "
	while j < len(string)-(offset + 1):
		codon = string[j] + string[j+1] + string[j+2]
		amino = amino+ standard[codon] 
		j+=3
	return amino
	
#This function changes a nucleotide randomly into another
def changenuc(nuc):
	random.seed()
	rando = random.randint(0, 2)
	if nuc == 'a':
		newnuc = nuca[rando]
	if nuc == 't':
		newnuc = nuct[rando]		
	if nuc == 'g':
		newnuc = nucg[rando]		
	if nuc == 'c':
		newnuc = nucc[rando]
	return newnuc
#This function returns a string with base pair mutations occuring at probability 1%	
def mutate (s):
	mutated = ""
	i = 0
	while i < len(s):
		random.seed()
		rando = random.randint(1, 100)
		if rando == 50:
			mutated = mutated + changenuc(s[i])
		else:
			mutated = mutated + s[i]
		i +=1
	return mutated
			
	
p53segsource = "cggagcagctcactattcacccgatgagaggggaggagagagagagaaaatgtcctttaggccggttcctcttacttggcagagggaggctgctattctccgcctgcatttctttttctggattacttagttatggcctttgcaaaggcaggggtatttgttttgatgcaaacctcaatccctccccttctttgaatggtgtgccccaccccccgggtcgcctgcaacctaggcggacgctaccatggcgtagacagggagggaaagaagtgtgcagaaggcaagcccggaggcactttcaagaatgagcatatctcatcttcccggagaaaaaaaaaaaagaatggtacgtctgagaatgaaattttgaaagagtgcaatgatgggtcgtttgataatttgtcgggaaaaacaatctacctgttatctagctttgggctaggccattccagttccagacgcaggctgaacgtcgtgaagcggaaggggcgggcccgcaggcgtccgtgtggtcctccgtgcagccctcggcccgagccggttcttcctggtaggaggcggaactcgaattcatttctcccgctgccccatctcttagctcgcggttgtttcattccgcagtttcttcccatgcacctgccgcgtaccggccactttgtgccgtacttacgtcatctttttcctaaatcgaggtggcatttacacacagcgccagtgcacacagcaagtgcacaggaagatgagttttggcccctaaccgctccgtgatgcctaccaagtcacagacccttttcatcgtcccagaaacgtttcatcacgtctcttcccagtcgattcccgaccccacctttattttgatctccataaccattttgcctgttggagaacttcatatagaatggaatcaggatgggcgctgtggctcacgcctgcactttggctcacgcctgcactttgggaggccgaggcgggcggattacttgaggataggagttccagaccagcgtggccaacgtggtg"

p53seg = p53segsource

standard = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C',
		'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C',
		'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*',
		'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',

		'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R',
		'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R',
		'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R',
		'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',

 		'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S',
 		'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',
		'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',
  		'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',

		'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G',
		'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G',
		'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G',
		'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G'
		}

#Below is the code for problem one
gccontent = p53seg.count('g') + p53seg.count('c')
gcratio = 1.0 * gccontent/len(p53seg)
print(gcratio)

#Below is the code for problem 2
reverse = p53seg
reverse = reverse.replace('a', 'b')
reverse = reverse.replace('g','d')
reverse = reverse.replace('t', 'a')
reverse = reverse.replace('c', 'g')
reverse = reverse.replace('b','t')
reverse = reverse.replace('d', 'c')
reverse = reverse[::-1]
print(reverse)

#Below is the code for problem 3 though the translate funciton is defined at the top
print("Frame +1")
print(translate(p53seg, 1))
print("Frame +2")
print(translate(p53seg, 2))
print("Frame +3")
print(translate(p53seg, 3))
print("Frame -1")
print(translate(reverse, 1))
print("Frame -2")
print(translate(reverse, 2))
print("Frame -3")
print(translate(reverse, 3))

#Below is the code for problem 4 though funcitons are defined at the top
count = 0
prematurecount = 0
originalterminations = translate(p53seg,1).count('*')
print "There are %d terminations in the original sequence" % (originalterminations)
while count < 1000:
	mutated = mutate(p53seg)	
	translation = translate(mutated, 1)
	if count < 5:
		print(translation)
	terminations = translation.count('*')
	if terminations > originalterminations:
		prematurecount +=1
	count+=1
frequency = prematurecount/1000.0
print "The frequency of early terminations is %f" % (frequency, )